This documnet is a practice analysis on Ziesel dataset

Data Import

Import dataset

# Import Ziesel dataset
dat <- read.csv("Zeisel_preprocessed.csv", row.names = 1)
cell_type <- read.table("Zeisel_cell_info.txt", sep = "\t", header = 1)

# Get the labels for each cell
cluster_labels <- as.numeric(as.factor(cell_type$level1class))
set.seed(10)

rand_ind <- sample(nrow(dat), 300)
sub_dat <- dat[rand_ind, ]

sub_celltype <- cell_type[rand_ind, ]
sub_cluster_labels <- as.numeric(as.factor(sub_celltype$level1class))

Dependence Measures

1. Pearson’s correlation coefficient

  • it measures the linear dependence.
  • the runtime is very short compared to other methods.
cor_pearson_mat <- stats::cor(sub_dat, method = "pearson")

cor_pearson_mat[upper.tri(cor_pearson_mat, diag = T)] <- NA
cor_pearson_mat[1:5,1:5]
##             Vip         Sst       Npy      Reln Cnr1
## Vip          NA          NA        NA        NA   NA
## Sst  -0.1168810          NA        NA        NA   NA
## Npy   0.6467325  0.30057685        NA        NA   NA
## Reln  0.6132735  0.15881539 0.9368308        NA   NA
## Cnr1  0.8533715 -0.07170653 0.6344162 0.6912581   NA
# plot the smallest correlations
cor_pearson_vec <- sort(abs(cor_pearson_mat), decreasing = T)
plot(cor_pearson_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(cor_pearson_mat) == cor_pearson_vec[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_pearson_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(cor_pearson_mat) == rev(cor_pearson_vec)[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_pearson_mat[idx1, idx2], 3)))
}

2. Spearman’s correlation coefficient

  • captures monotonous relationship within data.
  • the runtime is very short compared to other methods.
cor_spearman_mat <- stats::cor(sub_dat, method = "spearman")

cor_spearman_mat[upper.tri(cor_spearman_mat, diag = T)] <- NA
cor_spearman_mat[1:5,1:5]
##              Vip        Sst        Npy      Reln Cnr1
## Vip           NA         NA         NA        NA   NA
## Sst  -0.02997322         NA         NA        NA   NA
## Npy   0.29338015 0.45339704         NA        NA   NA
## Reln -0.02916744 0.26181491 0.65739042        NA   NA
## Cnr1  0.22833898 0.01620062 0.08367782 0.2958033   NA
# plot the smallest correlations
cor_spearman_vec <- sort(abs(cor_spearman_mat), decreasing = T)
plot(cor_spearman_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(cor_spearman_mat) == cor_spearman_vec[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_spearman_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(cor_spearman_mat) == rev(cor_spearman_vec)[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_spearman_mat[idx1, idx2], 3)))
}

3. Kendall’s correlation coefficient, τ

  • alternative method to Spearman’s correlations, identifying monotonic relationships.
  • it takes more time than Pearson and Spearman do, so I decide to apply this method on the subset of dataset of which the size is 300 rows.
cor_kendall_mat <- stats::cor(sub_dat, method = "kendall")

cor_kendall_mat[upper.tri(cor_kendall_mat, diag = T)] <- NA
cor_kendall_mat[1:5,1:5]
##             Vip        Sst       Npy      Reln Cnr1
## Vip          NA         NA        NA        NA   NA
## Sst  0.08704571         NA        NA        NA   NA
## Npy  0.30934225 0.40294314        NA        NA   NA
## Reln 0.08860647 0.21814939 0.5280268        NA   NA
## Cnr1 0.23799331 0.08026756 0.1843032 0.2985507   NA
# plot the smallest correlations
cor_kendall_vec <- sort(abs(cor_kendall_mat), decreasing = T)
plot(cor_kendall_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(cor_kendall_mat) == cor_kendall_vec, arr.ind = T)
 idx1 <- idx[i,1]; idx2 <- idx[i,2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_kendall_mat[idx1, idx2], 3)))
}
## Warning in abs(cor_kendall_mat) == cor_kendall_vec: longer object length is not
## a multiple of shorter object length

## Warning in abs(cor_kendall_mat) == cor_kendall_vec: longer object length is not
## a multiple of shorter object length

## Warning in abs(cor_kendall_mat) == cor_kendall_vec: longer object length is not
## a multiple of shorter object length

## Warning in abs(cor_kendall_mat) == cor_kendall_vec: longer object length is not
## a multiple of shorter object length

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(cor_kendall_mat) == rev(cor_kendall_vec), arr.ind = T)
 idx1 <- idx[i,1]; idx2 <- idx[i,2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_kendall_mat[idx1, idx2], 3)))
}
## Warning in abs(cor_kendall_mat) == rev(cor_kendall_vec): longer object length is
## not a multiple of shorter object length

## Warning in abs(cor_kendall_mat) == rev(cor_kendall_vec): longer object length is
## not a multiple of shorter object length

## Warning in abs(cor_kendall_mat) == rev(cor_kendall_vec): longer object length is
## not a multiple of shorter object length

## Warning in abs(cor_kendall_mat) == rev(cor_kendall_vec): longer object length is
## not a multiple of shorter object length

3-1. Faster Kendall’s tau (pcaPP)

library(pcaPP)

faster_kendall_mat <- cor.fk(sub_dat)

faster_kendall_mat[upper.tri(faster_kendall_mat, diag = T)] <- NA
faster_kendall_mat[1:5,1:5]
##              Vip         Sst        Npy      Reln Cnr1
## Vip           NA          NA         NA        NA   NA
## Sst  -0.02501672          NA         NA        NA   NA
## Npy   0.20004459 0.339442586         NA        NA   NA
## Reln -0.04267559 0.188762542 0.48918618        NA   NA
## Cnr1  0.17217391 0.009855072 0.04807135 0.1974136   NA
# plot the smallest correlations
faster_kendall_vec <- sort(abs(faster_kendall_mat), decreasing = T)
plot(faster_kendall_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(faster_kendall_mat) == faster_kendall_vec, arr.ind = T)
 idx1 <- idx[i,1]; idx2 <- idx[i,2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(faster_kendall_mat[idx1, idx2], 3)))
}
## Warning in abs(faster_kendall_mat) == faster_kendall_vec: longer object length
## is not a multiple of shorter object length

## Warning in abs(faster_kendall_mat) == faster_kendall_vec: longer object length
## is not a multiple of shorter object length

## Warning in abs(faster_kendall_mat) == faster_kendall_vec: longer object length
## is not a multiple of shorter object length

## Warning in abs(faster_kendall_mat) == faster_kendall_vec: longer object length
## is not a multiple of shorter object length

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(faster_kendall_mat) == rev(faster_kendall_vec), arr.ind = T)
 idx1 <- idx[i,1]; idx2 <- idx[i,2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(faster_kendall_mat[idx1, idx2], 3)))
}
## Warning in abs(faster_kendall_mat) == rev(faster_kendall_vec): longer object
## length is not a multiple of shorter object length

## Warning in abs(faster_kendall_mat) == rev(faster_kendall_vec): longer object
## length is not a multiple of shorter object length

## Warning in abs(faster_kendall_mat) == rev(faster_kendall_vec): longer object
## length is not a multiple of shorter object length

## Warning in abs(faster_kendall_mat) == rev(faster_kendall_vec): longer object
## length is not a multiple of shorter object length

4. Distance correlation

  • it is a fully non-parametric measure that identifies non-linear relationships between two random variables with energy distances
library(energy)

dist_cor_mat <- matrix(nrow = ncol(sub_dat), ncol = ncol(sub_dat))

for (i in 2:ncol(sub_dat)){
  for (j in 1:(i-1)){
    dist_cor_mat[i,j] <- dcor(as.numeric(sub_dat[, i]), as.numeric(sub_dat[, j]))
  }
}
dist_cor_mat[upper.tri(dist_cor_mat, diag = T)] <- NA
dist_cor_mat[1:5,1:5]
##           [,1]      [,2]      [,3]      [,4] [,5]
## [1,]        NA        NA        NA        NA   NA
## [2,] 0.2584206        NA        NA        NA   NA
## [3,] 0.6414468 0.5080579        NA        NA   NA
## [4,] 0.6261122 0.3853323 0.9360060        NA   NA
## [5,] 0.7433247 0.2485128 0.6455535 0.7043314   NA
# plot the smallest correlations
dist_cor_vec <- sort(abs(dist_cor_mat), decreasing = T)
plot(dist_cor_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(dist_cor_mat) == dist_cor_vec[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(dist_cor_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(dist_cor_mat) == rev(dist_cor_vec)[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(dist_cor_mat[idx1, idx2], 3)))
}

5. Hoeffding’s D measure

  • tests the independence of the data sets by calculating the distance between the product of the marginal distributions
library(Hmisc)

hoeffd_cor_mat <- hoeffd(x = as.matrix(sub_dat))

hoeff_dist <- hoeffd_cor_mat$D

hoeff_dist[upper.tri(hoeff_dist, diag = T)] <- NA
# plot the smallest correlations
cor_hoeff_vec <- sort(abs(hoeff_dist), decreasing = T)
plot(cor_hoeff_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(hoeff_dist) == (cor_hoeff_vec)[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(hoeff_dist[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(hoeff_dist) == rev(cor_hoeff_vec)[i], arr.ind = T)
 idx1 <- idx[1,1]; idx2 <- idx[1,2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(hoeff_dist[idx1, idx2], 3)))
}

6. Mutual information (MI)

  • measures how much one random variable tells us about another.
library(entropy)

mi_cor_mat <- matrix(nrow = ncol(sub_dat), ncol = ncol(sub_dat))

for (i in 1:ncol(sub_dat)){
  for (j in 1:ncol(sub_dat)){
    y2d <- discretize2d(as.matrix(sub_dat[, i]),
                                   as.matrix(sub_dat[, j]),
                                   numBins1 = 20,
                                   numBins2 = 20)
    mi_cor_mat[i,j] <- as.numeric(mi.empirical(y2d))
  }
}
# plot the smallest correlations
cor_mi_vec <- sort(abs(mi_cor_mat), decreasing = T)
plot(cor_mi_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(mi_cor_mat) == (cor_mi_vec)[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[1,2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(mi_cor_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(mi_cor_mat) == rev(cor_mi_vec)[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[1,2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(mi_cor_mat[idx1, idx2], 3)))
}

7. Maximum Information Coefficient (MIC)

library(minerva)

cor_MIC <- mine(sub_dat)
cor_MIC_mat <- cor_MIC$MIC
cor_MIC_mat[upper.tri(cor_MIC_mat, diag = T)] <- NA

cor_MIC_vec <- sort(abs(cor_MIC_mat), decreasing = T)
plot(cor_MIC_vec)

#plot the high correlations
par(mfrow = c(2,2))

for(i in 1:4){
  idx <- which(abs(cor_MIC_mat) == (cor_MIC_vec)[i], arr.ind = T)
  idx1 <- idx[i, 1]; idx2 <- idx[i,2]
  plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_MIC_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))

for(i in 1:4){
 idx <- which(abs(cor_MIC_mat) == rev(cor_MIC_vec)[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[2]
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_MIC_mat[idx1, idx2], 3)))
}

8. Chatterjee’s method

library(XICOR)

XI_cor_mat <- matrix(nrow = ncol(sub_dat), ncol = ncol(sub_dat))

for (i in 1:ncol(sub_dat)){
  for (j in 1:ncol(sub_dat)){
    XI_cor_mat[i,j] <- calculateXI(as.numeric(sub_dat[, i]), as.numeric(sub_dat[, j]))
  }
}
XI_cor_mat[upper.tri(XI_cor_mat, diag = T)] <- NA

XI_cor_vec <- sort(abs(XI_cor_mat), decreasing = T)
plot(XI_cor_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
  idx <- which(abs(XI_cor_mat) == (XI_cor_vec)[i], arr.ind = T)
  idx1 <- idx[1]; idx2 <- idx[2]
  plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(XI_cor_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))

for(i in 1:4){
 idx <- which(abs(XI_cor_mat) == rev(XI_cor_vec)[i], arr.ind = T)
 idx1 <- idx[i, 1]; idx2 <- idx[i, 2]
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(XI_cor_mat[idx1, idx2], 3)))
}

Find the indices that have the contrast measure of dependency.

# low pearson and high spearman
cor_contrast1 <- (abs(cor_pearson_mat) < 0.3) & (abs(cor_spearman_mat) > 0.7)
cor_contrast_ind1 <- which(cor_contrast1, arr.ind = T)

# high pearson and low spearman
cor_contrast2 <- (abs(cor_pearson_mat) > 0.8) & (abs(cor_spearman_mat) < 0.2)
cor_contrast_ind2 <- which(cor_contrast2, arr.ind = T)
nrow(cor_contrast_ind2)
## [1] 90
# low pearson and high kendall
cor_contrast3 <- (abs(cor_pearson_mat) < 0.2) & (abs(cor_kendall_mat) > 0.8)
cor_contrast_ind3 <- which(cor_contrast3, arr.ind = T)
nrow(cor_contrast_ind3)
## [1] 0
# high pearson and low kendall
cor_contrast4 <- (abs(cor_pearson_mat) > 0.8) & (abs(cor_kendall_mat) < 0.2)
cor_contrast_ind4 <- which(cor_contrast4, arr.ind = T)
nrow(cor_contrast_ind4)
## [1] 703
# low pearson and high distance correlation
cor_contrast5 <- (abs(cor_pearson_mat) < 0.2) & (dist_cor_mat > 0.6)
cor_contrast_ind5 <- which(cor_contrast5, arr.ind = T)

# high pearson and low distance correlation
cor_contrast6 <- (abs(cor_pearson_mat) > 0.6) & (dist_cor_mat < 0.4)
cor_contrast_ind6 <- which(cor_contrast6, arr.ind = T)
nrow(cor_contrast_ind6)
## [1] 189
# low pearson and high MIC
cor_contrast7 <- (abs(cor_pearson_mat) < 0.2) & (abs(cor_MIC_mat) > 0.75)
cor_contrast_ind7 <- which(cor_contrast7, arr.ind = T)
nrow(cor_contrast_ind7)
## [1] 56
# high pearson and low MIC
cor_contrast8 <- (abs(cor_pearson_mat) > 0.75) & (abs(cor_MIC_mat) < 0.25)
cor_contrast_ind8 <- which(cor_contrast8, arr.ind = T)
nrow(cor_contrast_ind8)
## [1] 7
# low pearson and high XI
cor_contrast9 <- (abs(cor_pearson_mat) < 0.45) & (abs(XI_cor_mat) > 0.55)
cor_contrast_ind9 <- which(cor_contrast9, arr.ind = T)
nrow(cor_contrast_ind9)
## [1] 8
# high pearson and low XI
cor_contrast10 <- (abs(cor_pearson_mat) > 0.85) & (abs(XI_cor_mat) < 0.10)
cor_contrast_ind10 <- which(cor_contrast10, arr.ind = T)
nrow(cor_contrast_ind10)
## [1] 10

Visualization of low pearson (<0.3) and high spearman (>0.7)

par(mfrow = c(2, 3))
for (i in 1:nrow(cor_contrast_ind1)){
   index1 <- cor_contrast_ind1[i, 1]; index2 <- cor_contrast_ind1[i, 2]
   plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
      ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
      main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                   "\n",
                   paste0("Spearman of ", round(cor_spearman_mat[index1, index2], 3))))
}

Visualization of low pearson (<0.2) and high distance correlation (>0.6)

par(mfrow = c(2, 3))

for (i in 1:nrow(cor_contrast_ind5)){
   index1 <- cor_contrast_ind5[i, 1]; index2 <- cor_contrast_ind5[i, 2]
   plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
      ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
      main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                   "\n",
                   paste0("Dist.Cor of ", round(dist_cor_mat[index1, index2], 3))))
}

Visualization of low pearson (< 0.2) and high MIC (> 0.75)

par(mfrow = c(2, 4))

for (i in 1:nrow(cor_contrast_ind7)){
   index1 <- cor_contrast_ind7[i, 1]; index2 <- cor_contrast_ind7[i, 2]
   plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
      ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
      main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                   "\n",
                   paste0("MIC of ", round(cor_MIC_mat[index1, index2], 3))))
}

Visualization of high pearson (> 0.75) and low MIC (< 0.25)

par(mfrow = c(3,3))

for (i in 1:nrow(cor_contrast_ind8)){
   index1 <- cor_contrast_ind8[i, 1]; index2 <- cor_contrast_ind8[i, 2]
   plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
      ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
      main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                   "\n",
                   paste0("MIC of ", round(cor_MIC_mat[index1, index2], 3))))
}

Visualization of low pearson (< 0.45) and high XI (> 0.55)

par(mfrow = c(2, 2))

for (i in 1:nrow(cor_contrast_ind9)){
   index1 <- cor_contrast_ind9[i, 1]; index2 <- cor_contrast_ind9[i, 2]
   plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
      ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
      main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                   "\n",
                   paste0("XI of ", round(XI_cor_mat[index1, index2], 3))))
}

Visualization of high pearson (> 0.85) and low XI (< 0.10)

par(mfrow = c(2,4))

for (i in 1:nrow(cor_contrast_ind10)){
   index1 <- cor_contrast_ind10[i, 1]; index2 <- cor_contrast_ind10[i, 2]
   plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
      ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
      main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                   "\n",
                   paste0("XI of ", round(XI_cor_mat[index1, index2], 3))))
}